rm(list = ls())

library(tidyverse)

## Get data

df <- read_rds('data/mlfz_n_obs_per_county.rds') %>% 
  dplyr::rename(n = 1, county = 2, year = 3) %>% 
  filter(!is.na(year))

## Med per year

mpy <- df %>%
  group_by(year) %>%
  summarise(m = median(n, na.rm = T)) %>%
  ungroup()

qpy <- df %>%
  group_by(year) %>%
  summarise(m = quantile(n, 0.1, na.rm = T)) %>%
  ungroup()

## Plot

p1 <- ggplot(df, aes(x = n)) +
  geom_histogram(
    fill = "grey93",
    color = "black"
  ) +
  geom_vline(
    data = mpy,
    aes(xintercept = m),
    linetype = "dotted"
  ) +
  geom_vline(
    data = qpy,
    aes(xintercept = m),
    linetype = "dashed"
  ) +
  scale_x_log10() +
  facet_wrap(~year) +
  ylab("Number of counties") +
  xlab("Observations per county") +
  theme_bw()
p1
